Code
# Set options
knitr:: opts_chunk$ set (
echo = TRUE ,
warning = FALSE ,
message = FALSE ,
fig.align = 'center' ,
fig.retina = 2
)
rm (list= ls ())
library (tinytex)
Warning: package 'tinytex' was built under R version 4.5.2
Code
library (ggplot2)
#library(table1)
library (gt)
library (survival)
library (data.table)
library (randomForest)
library (grf)
library (policytree)
library (DiagrammeR)
#library(grid)
#library(forestploter)
#library(randomizr)
# library(devtools)
# install_github("larry-leon/weightedsurv", force = TRUE)
#install.packages("weightedsurv")
# install_github("larry-leon/forestsearch", force = TRUE)
library (forestsearch)
library (weightedsurv)
# Set theme for plots
theme_set (theme_minimal (base_size = 12 ))
Summary
Reproducing main GBSG analysis
Datasetup
Code
df.analysis <- gbsg
df.analysis <- within (df.analysis,{
id <- as.numeric (c (1 : nrow (df.analysis)))
# time to months
time_months <- rfstime/ 30.4375
grade3 <- ifelse (grade== "3" ,1 ,0 )
treat <- hormon
})
confounders.name <- c ("age" ,"meno" ,"size" ,"grade3" ,"nodes" ,"pgr" ,"er" )
outcome.name <- c ("time_months" )
event.name <- c ("status" )
id.name <- c ("id" )
treat.name <- c ("hormon" )
Kaplan-Meier curves and baseline summary
Code
dfcount <- df_counting (
df = df.analysis,
by.risk = 6 ,
tte.name = outcome.name,
event.name = event.name,
treat.name = treat.name
)
plot_weighted_km (dfcount, conf.int = TRUE , show.logrank = TRUE , ymax = 1.05 , xmed.fraction = 0.775 , ymed.offset = 0.125 )
Code
create_summary_table (data = df.analysis, treat_var = treat.name,
table_title = "GBSG Characteristics by Treatment Arm" ,
vars_continuous= c ("age" ,"nodes" ,"size" ,"er" ,"pgr" ),
vars_categorical= c ("grade" ,"grade3" ),
font_size = 12 )
Characteristic
Control (n=440)
Treatment (n=246)
P-value
SMD
age
Mean (SD)
51.1 (10.0)
56.6 (9.4)
<0.001
0.57
nodes
Mean (SD)
4.9 (5.6)
5.1 (5.3)
0.665
0.03
size
Mean (SD)
29.6 (14.4)
28.8 (14.1)
0.470
0.06
er
Mean (SD)
79.7 (124.2)
125.8 (191.1)
<0.001
0.30
pgr
Mean (SD)
102.0 (170.0)
124.3 (249.7)
0.213
0.11
grade
0.273
0.06
1
48 (10.9%)
33 (13.4%)
2
281 (63.9%)
163 (66.3%)
3
111 (25.2%)
50 (20.3%)
grade3
0.174
0.05
0
329 (74.8%)
196 (79.7%)
1
111 (25.2%)
50 (20.3%)
GRF analysis
Code
## GRF
grf_est1 <- grf.subg.harm.survival (data= df.analysis,
confounders.name = confounders.name,
outcome.name= outcome.name, event.name= event.name, id.name= id.name, treat.name= treat.name,
maxdepth = 2 , n.min = 60 , dmin.grf = 12 , frac.tau= 0.6 , details= TRUE )
tau, maxdepth = 46.75811 2
leaf.node control.mean control.size control.se depth
1 2 6.49 82.00 3.34 1
2 3 -4.10 604.00 1.06 1
11 4 -7.90 112.00 2.81 2
21 5 3.86 177.00 1.87 2
4 7 -5.89 356.00 1.33 2
Selected subgroup:
leaf.node control.mean control.size control.se depth
1 2 6.49 82.00 3.34 1
GRF subgroup found
Terminating node at max.diff (sg.harm.id):
[1] "er <= 0"
All splits:
[1] "er <= 0" "age <= 50" "age <= 43"
Code
# NOTE : In general for GRF trees
# leaf1 --> recommend control
# leaf2 --> recommend treatment
# Tree depth 1
plot (grf_est1$ tree1,leaf.labels= c ("Control" ,"Treat" ))
Code
# Tree depth 2
plot (grf_est1$ tree2,leaf.labels= c ("Control" ,"Treat" ))
Forestsearch with depth=2 (maxk = 2)
Code
# Setup parallel processing
library (doFuture)
library (doRNG)
registerDoFuture ()
registerDoRNG ()
system.time ({fs <- forestsearch (df.analysis, confounders.name = confounders.name,
outcome.name = "time_months" , treat.name = "hormon" , event.name = "status" , id.name = "id" ,
potentialOutcome.name = NULL ,
df.test = NULL ,
flag_harm.name = NULL ,
hr.threshold = 1.0 , hr.consistency = 0.9 , pconsistency.threshold = 0.90 ,
sg_focus = "hr" , max_subgroups_search = 30 ,
use_twostage = TRUE ,
showten_subgroups = TRUE , details= TRUE ,
conf_force = NULL ,
cut_type = "default" , use_grf = TRUE , plot.grf = TRUE , use_lasso = FALSE ,
maxk = 2 , fs.splits = 1000 ,
n.min = 60 , d0.min = 10 , d1.min = 10 ,
plot.sg = TRUE , by.risk = 6 ,
parallel_args = list (plan= "callr" , workers = 30 , show_message = TRUE )
)
})
=== Two-Stage Consistency Evaluation Enabled ===
Stage 1 screening splits: 30
Maximum total splits: 1000
Batch size: 20
================================================
GRF stage for cut selection with dmin, tau = 12 0.6
tau, maxdepth = 46.75811 2
leaf.node control.mean control.size control.se depth
1 2 6.49 82.00 3.34 1
2 3 -4.10 604.00 1.06 1
11 4 -7.90 112.00 2.81 2
21 5 3.86 177.00 1.87 2
4 7 -5.89 356.00 1.33 2
Selected subgroup:
leaf.node control.mean control.size control.se depth
1 2 6.49 82.00 3.34 1
GRF subgroup found
Terminating node at max.diff (sg.harm.id):
[1] "er <= 0"
All splits:
[1] "er <= 0" "age <= 50" "age <= 43"
GRF cuts identified: 3
Cuts: er <= 0, age <= 50, age <= 43
# of continuous/categorical characteristics 5 2
Continuous characteristics: age size nodes pgr er
Categorical characteristics: meno grade3
Default cuts included (1st 20) age <= mean(age) age <= median(age) age <= qlow(age) age <= qhigh(age) size <= mean(size) size <= median(size) size <= qlow(size) size <= qhigh(size) nodes <= mean(nodes) nodes <= median(nodes) nodes <= qlow(nodes) nodes <= qhigh(nodes) pgr <= mean(pgr) pgr <= median(pgr) pgr <= qlow(pgr) pgr <= qhigh(pgr) er <= mean(er) er <= median(er) er <= qlow(er) er <= qhigh(er)
Categorical: meno grade3
Factors per GRF: er <= 0 age <= 50 age <= 43
Initial GRF cuts included er <= 0 age <= 50 age <= 43
===== CONSOLIDATED CUT EVALUATION (IMPROVED) =====
Evaluating 25 cut expressions once and caching...
Cut evaluation summary:
Total cuts: 25
Valid cuts: 25
Errors: 0
✓ All 25 factors validated as 0/1
===== END CONSOLIDATED CUT EVALUATION =====
# of candidate subgroup factors= 25
[1] "er <= 0" "age <= 50" "age <= 43" "age <= 53.1" "age <= 53"
[6] "age <= 46" "age <= 61" "size <= 29.3" "size <= 25" "size <= 20"
[11] "size <= 35" "nodes <= 5" "nodes <= 3" "nodes <= 1" "nodes <= 7"
[16] "pgr <= 110" "pgr <= 32.5" "pgr <= 7" "pgr <= 131.8" "er <= 96.3"
[21] "er <= 36" "er <= 8" "er <= 114" "meno" "grade3"
Number of possible configurations (<= maxk): maxk = 2 , # combinations = 1275
Events criteria: control >= 10 , treatment >= 10
Subgroup search completed in 0.02 minutes
Found 77 subgroup candidate(s)
# of candidate subgroups (meeting all criteria) = 77
Random seed set to: 8316951
Removed 6 near-duplicate subgroups
Original rows: 77
After removal: 71
# of unique initial candidates: 71
# Restricting to top stop_Kgroups = 30
# of candidates to evaluate: 30
Algorithm: Two-stage sequential
Stage 1 splits: 30
Screen threshold: 0.763
Max total splits: 1000
Batch size: 20
Parallel processing: callr with 30 workers
*** Subgroup found: {er <= 0} !{age <= 43}
% consistency criteria met= 1
SG focus= hr
Subgroup Consistency Minutes= 0.066
Algorithm used: Two-stage sequential
Candidates evaluated: 30
Candidates passed: 13
Subgroup found (FS) with sg_focus='hr'
Selected subgroup: {er <= 0} & !{age <= 43}
Minutes forestsearch overall = 0.1
Consistency algorithm used: twostage
user system elapsed
27.081 1.896 5.845
Code
plan ("sequential" )
# Results for estimation (training) data, which_df = "est" is default
res_tabs <- sg_tables (fs, ndecimals = 3 , which_df = "est" )
res_tabs$ sg10_out
Two-factor subgroups (maxk=2)
{er <= 0}
!{age <= 43}
68
38
14
2.164
1.000
{er <= 0}
{size <= 35}
61
34
15
2.537
0.990
{er <= 0}
{pgr <= 32.5}
75
41
16
2.222
0.990
{er <= 0}
{nodes <= 7}
61
31
11
2.335
0.970
{er <= 0}
!{size <= 20}
61
35
12
2.054
0.970
{age <= 50}
{pgr <= 7}
71
36
12
1.707
0.970
!{age <= 43}
{age <= 50}
177
55
18
1.530
0.970
{er <= 0}
82
45
16
1.951
0.960
!{size <= 29.3}
{er <= 8}
76
47
15
1.722
0.960
{er <= 0}
{pgr <= 7}
64
34
13
1.992
0.950
{er <= 8}
!{meno}
84
46
12
1.725
0.910
{grade3}
{pgr <= 7}
72
39
13
1.710
0.910
!{size <= 25}
{er <= 8}
89
55
17
1.553
0.900
Search Configuration: Single-factor candidates (L) = 50; Maximum combinations evaluated = 1,275; Search depth (maxk) = 2
Search Results: Candidate subgroups found = 77; Maximum HR estimate = 2.54
Note: E1 = events in treatment arm; Pcons = consistency proportion
Code
Training data estimates
ITT
686 (100.0%)
246 (35.9%)
299 (43.6%)
66.3
50.2
7.8
0.69 (0.54, 0.89)
Questionable
68 (9.9%)
22 (32.4%)
38 (55.9%)
18.5
47.6
-15
2.16 (1.08, 4.35)
Recommend
618 (90.1%)
224 (36.2%)
261 (42.2%)
66.7
52.6
9.9
0.62 (0.48, 0.81)
Bootstrap Inference
Code
#output_dir <- "dev/vignettes-working/applications/gbsg/results"
output_dir <- "results/"
save_results <- dir.exists (output_dir)
# File pre-fix for saving
fileout_boot <- c ("gbsg-k2_v5_B=1000" )
fileout_cv <- c ("gbsg-k2_v5_CV=200" )
# patchhwork needed for a combined bootstrap plot (otherwise if not avaialable will not produce)
library (patchwork)
# Number of bootstrap samples
NB <- 1000
system.time ({fs_bc <- forestsearch_bootstrap_dofuture (
fs.est = fs,
nb_boots = NB,
show_three = FALSE ,
details = TRUE )
})
Ystar matrix generated should be 'boots x N': 1000 x 686
ForestSearch parameters for bootstrap iterations:
- sg_focus: hr
- maxk: 2
- fs.splits: 1000
- max_subgroups_search: 30
- hr.threshold: 1
- hr.consistency: 0.9
- pconsistency.threshold: 0.9
- n.min: 60
- use_twostage: TRUE
- use_lasso: FALSE
- use_grf: TRUE
Bootstrap-specific overrides:
- grf_res: NULL (forces re-selection)
- grf_cuts: NULL (forces re-selection)
- parallel_args: sequential (prevents nested parallelism)
- details: FALSE (suppressed in workers)
- plot.sg: FALSE
- plot.grf: FALSE
=== Bootstrap Analysis Complete ===
Success rate: 96.1% (961/1000)
H (Questionable) Estimates:
Unadjusted: 2.16 (1.08,4.35)
Bias-corrected: 1.64 (0.73,3.69)
Hc (Recommend) Estimates:
Unadjusted: 0.62 (0.48,0.81)
Bias-corrected: 0.65 (0.45,0.94)
===================================
user system elapsed
13210.452 240.624 1074.915
Code
plan ("sequential" )
if (save_results) {
filename <- file.path (output_dir,
paste0 (fileout_boot,
".RData" ))
save (df.analysis, fs, fs_bc, file = filename)
cat (" \n Results saved to:" , filename, " \n " )
}
Results saved to: results//gbsg-k2_v5_B=1000.RData
Diagnostics and Summaries
Code
#load("~/Documents/GitHub/forestsearch/vignettes/results/sim_gbsg_example_B=1000.RData")
output_dir <- "results/"
load_results <- dir.exists (output_dir)
if (load_results){
filename <- file.path (output_dir,
paste0 (fileout_boot,".RData" ))
load (file = filename)
}
summaries <- summarize_bootstrap_results (
sgharm = fs$ sg.harm,
boot_results = fs_bc,
create_plots = TRUE ,
est.scale = "hr"
)
===============================================================
BOOTSTRAP ANALYSIS SUMMARY
===============================================================
BOOTSTRAP SUCCESS METRICS:
-------------------------------------------------------------
Total iterations: 1000
Successful subgroup ID: 961 (96.1%)
Failed to find subgroup: 39 (3.9%)
TIMING ANALYSIS:
-------------------------------------------------------------
Overall:
Total bootstrap time: 17.89 minutes (0.30 hours)
Average per iteration: 0.02 min (1.1 sec)
Per-iteration timing:
Mean: 0.24 min (14.5 sec)
Median: 0.24 min (14.2 sec)
Std Dev: 0.09 minutes
Range: [0.05, 0.57] minutes
IQR: [0.17, 0.31] minutes
ForestSearch timing (successful iterations only):
Iterations with FS: 1000 (100.0%)
Mean FS time: 0.24 min (14.5 sec)
Median FS time: 0.24 min (14.2 sec)
Total FS time: 241.64 minutes
FS time % of total: 1351.0%
Overhead timing (Cox models, bias correction, etc.):
Mean overhead: 0.00 min (0.0 sec)
Median overhead: 0.00 min (0.0 sec)
Total overhead: 0.24 minutes
Overhead % of total: 1.3%
PERFORMANCE ASSESSMENT:
-------------------------------------------------------------
Performance rating: ✓✓✓ Excellent
Average iteration speed: 1.1 seconds
===============================================================
Code
sg_tab <- summaries$ table
sg_tab
Bootstrap bias-corrected estimates (1000 iterations)
N
NT
Events
MedT
MedC
RMSTd
HR (95% CI)†
HR‡ (95% CI)
Qstnbl
68 (9.9%)
22 (32.4%)
38 (55.9%)
18.5
47.6
-15
2.16 (1.08, 4.35)
1.64 (0.73,3.69)
Recmnd
618 (90.1%)
224 (36.2%)
261 (42.2%)
66.7
52.6
9.9
0.62 (0.48, 0.81)
0.65 (0.45,0.94)
Note : Med = Median survival time (months). RMSTd = Restricted mean survival time difference. Subgroup identified in 96.1% of bootstrap samples.
Code
event_summary <- summarize_bootstrap_events (fs_bc, threshold = 12 )
=== Bootstrap Event Count Summary ===
Total bootstrap iterations: 1000
Event threshold: <12 events
ORIGINAL Subgroup H on BOOTSTRAP samples:
Control arm <12 events: 0 (0.0%)
Treatment arm <12 events: 0 (0.0%)
Either arm <12 events: 0 (0.0%)
ORIGINAL Subgroup Hc on BOOTSTRAP samples:
Control arm <12 events: 0 (0.0%)
Treatment arm <12 events: 0 (0.0%)
Either arm <12 events: 0 (0.0%)
NEW Subgroups found: 961 (96.1%)
NEW Subgroup H* on ORIGINAL data:
Control arm <12 events: 35 (3.6% of successful)
Treatment arm <12 events: 81 (8.4% of successful)
Either arm <12 events: 113 (11.8% of successful)
NEW Subgroup Hc* on ORIGINAL data:
Control arm <12 events: 0 (0.0% of successful)
Treatment arm <12 events: 0 (0.0% of successful)
Either arm <12 events: 0 (0.0% of successful)
Code
summaries$ diagnostics_table_gt
Analysis of 1000 bootstrap iterations
Success Rate
Total iterations
1000
Successful subgroup ID
961 (96.1%)
Failed to find subgroup
39 (3.9%)
Success rating
Excellent ✓✓✓
Subgroup H (Questionable)
Unadjusted estimate
2.16 (1.08, 4.35)
Bias-corrected estimate
1.64 (0.73, 3.69)
Bias correction impact
24.2%
CI width change
3.27 -> 2.96
Subgroup Hc (Recommend)
Unadjusted estimate
0.62 (0.48, 0.81)
Bias-corrected estimate
0.65 (0.45, 0.94)
Bias correction impact
4.2%
CI width change
0.33 -> 0.49
Bootstrap Quality: H
Valid iterations
961
Mean (SD)
0.49 (0.49)
Coefficient of variation
98.2%
Skewness
-0.09
Bootstrap Quality: Hc
Valid iterations
961
Mean (SD)
-0.43 (0.21)
Coefficient of variation
49.7%
Skewness
0.23
Search Performance
Mean max HR found
3.19 (1.27)
Mean factors evaluated
47.7
Mean combinations tried
1168
Proportion at maxk
--
Interpretation Guide:
✓ Excellent stability : Subgroup is consistently identified across bootstrap samples.
⚠ High variability : Bootstrap estimates are imprecise (CV >= 25%). Consider increasing nb_boots or sample size.
Code
summaries$ subgroup_summary$ original_agreement
Metric Value
<char> <char>
1: Total bootstrap iterations 1000
2: Successful iterations 961
3: Failed iterations (no subgroup) 39
4: Exact match with original 146 (15.2%)
5: Different from original 815 (84.8%)
Code
summaries$ subgroup_summary$ factor_presence
Rank Factor Count Percent
2 1 er 591 61.49844
6 2 pgr 354 36.83663
7 3 size 308 32.04995
1 4 age 245 25.49428
5 5 nodes 140 14.56816
3 6 grade3 131 13.63163
4 7 meno 130 13.52758
Code
summaries$ subgroup_summary$ factor_presence_specific
Rank Base_Factor Factor_Definition Count Percent
149 1 er {er <= 0} 282 29.34443
168 2 er {er <= 8} 129 13.42352
171 3 grade3 {grade3} 128 13.31946
77 4 meno !{meno} 126 13.11134
Forest Search n-fold cross-validation
Code
output_dir <- "results/"
load_results <- dir.exists (output_dir)
if (load_results){
filename <- file.path (output_dir,
paste0 (fileout_boot,".RData" ))
load (file = filename)
}
# Kfolds = n (default to n-fold cross-validations)
fs_OOB <- NULL
fs_OOB <- forestsearch_Kfold (fs.est = fs, details = TRUE ,
parallel_args = list (plan = "callr" , workers = 36 , show_message = TRUE ))
Cross-validation setup:
- Observations: 686
- Folds: 686
- Fold sizes (range): 1-1
ForestSearch parameters for CV folds:
- sg_focus: hr
- maxk: 2
- fs.splits: 1000
- max_subgroups_search: 30
- hr.threshold: 1
- hr.consistency: 0.9
- pconsistency.threshold: 0.9
- n.min: 60
- use_twostage: TRUE
- use_lasso: FALSE
- use_grf: TRUE
- (per-fold parallel: sequential)
- (per-fold details: FALSE)
- (per-fold plot.sg: FALSE)
Cross-validation complete:
- Time: 13.5 minutes
- Subgroup found in 100 % of folds
Any found: 1
Exact match: 0.8746356
At least 1 match: 0.9752187
Cov 1 any: 0.9985423
Cov 2 any: 0.8746356
Cov 1 and 2 any: 0.8746356
Cov 1 exact: 0.9752187
Cov 2 exact: 0.8746356
Agreement (sens, ppv) in H and Hc: 0.8529412 0.9822006 0.8405797 0.9837925
Code
# Reset workers to single
plan (sequential)
summary_OOB <- forestsearch_KfoldOut (res= fs_OOB, details= TRUE , outall= TRUE )
Any found: 1
Exact match: 0.8746356
At least 1 match: 0.9752187
Cov 1 any: 0.9985423
Cov 2 any: 0.8746356
Cov 1 and 2 any: 0.8746356
Cov 1 exact: 0.9752187
Cov 2 exact: 0.8746356
Agreement (sens, ppv) in H and Hc: 0.8529412 0.9822006 0.8405797 0.9837925
Subgroup n n1 m1 m0 RMST
Overall "ITT" "686 (100.0%)" "246 (35.9%)" "66.3" "50.2" "7.8"
FA_0 "Not recommend" "68 (9.9%)" "22 (32.4%)" "18.5" "47.6" "-15"
KfA_0 "Not recommend" "69 (10.1%)" "18 (26.1%)" "27.2" "42.9" "-2.6"
FA_1 "Recommend" "618 (90.1%)" "224 (36.2%)" "66.7" "52.6" "9.9"
KfA_1 "Recommend" "617 (89.9%)" "228 (37.0%)" "66.3" "52.6" "7.5"
Hazard ratio
Overall "0.69 (0.54, 0.89)"
FA_0 "2.16 (1.08, 4.35)"
KfA_0 "1.13 (0.52, 2.47)"
FA_1 "0.62 (0.48, 0.81)"
KfA_1 "0.68 (0.52, 0.88)"
Code
table (summary_OOB$ SGs_found[,1 ])
!{age <= 43} !{size <= 29.3} !{size <= 29.4} {er <= 0} {er <= 8}
145 10 1 524 5
{grade3}
1
Code
table (summary_OOB$ SGs_found[,2 ])
!{age <= 43} !{size <= 29.3} {er <= 0} {er <= 8} {pgr <= 32}
455 5 145 11 3
{pgr <= 33} {pgr <= 7}
53 11
Code
Ksims <- 200
fs_ten <- forestsearch_tenfold (fs.est = fs, sims = Ksims, Kfolds = 10 , details = TRUE ,
parallel_args = list (plan = "callr" , workers = 36 , show_message = TRUE ))
Starting repeated K-fold cross-validation:
- Simulations: 200
- Folds per simulation: 10
- Workers: 13
ForestSearch parameters for CV folds:
- sg_focus: hr
- maxk: 2
- fs.splits: 1000
- max_subgroups_search: 30
- hr.threshold: 1
- hr.consistency: 0.9
- pconsistency.threshold: 0.9
- n.min: 60
- use_twostage: TRUE
- use_lasso: FALSE
- use_grf: TRUE
- (per-fold parallel: sequential)
- (per-fold details: FALSE)
- (per-fold plot.sg: FALSE)
Repeated K-fold CV complete:
- Time: 28.68 minutes
- Successful simulations: 200 / 200
- Projected hours per 100 sims: 0.24
Code
# Reset workers to single
plan (sequential)
print (fs_ten$ find_summary)
Any Exact At least 1 Cov1 Cov2 Cov 1 & 2 Cov1 exact
0.9 0.1 0.5 0.8 0.2 0.1 0.5
Cov2 exact
0.1
Code
print (fs_ten$ sens_summary)
sens_H sens_Hc ppv_H ppv_Hc
0.6029412 0.9498382 0.5656334 0.9560618
Code
print (head (fs_ten$ sens_out))
sens_H sens_Hc ppv_H ppv_Hc
[1,] 0.5588235 0.9530744 0.5671642 0.9515347
[2,] 0.5294118 0.9433657 0.5070423 0.9479675
[3,] 0.7352941 0.9498382 0.6172840 0.9702479
[4,] 0.5294118 0.9126214 0.4000000 0.9463087
[5,] 0.5294118 0.9385113 0.4864865 0.9477124
[6,] 0.4705882 0.9352751 0.4444444 0.9413681
Code
print (head (fs_ten$ find_out))
Any Exact At least 1 Cov1 Cov2 Cov 1 & 2 Cov1 exact Cov2 exact
[1,] 0.9 0.2 0.6 0.7 0.3 0.2 0.5 0.3
[2,] 0.9 0.0 0.4 0.8 0.0 0.0 0.4 0.0
[3,] 1.0 0.2 0.7 0.8 0.3 0.2 0.6 0.3
[4,] 1.0 0.1 0.5 0.6 0.3 0.1 0.3 0.3
[5,] 0.9 0.0 0.3 0.8 0.1 0.1 0.3 0.0
[6,] 0.9 0.2 0.5 0.7 0.4 0.2 0.3 0.4
Code
# Save all results
output_dir <- "results/"
save_results <- dir.exists (output_dir)
if (save_results) {
filename <- file.path (output_dir,
paste0 (fileout_cv,
".RData" ))
save (df.analysis, fs, fs_bc, fs_ten, fs_OOB, file = filename)
cat (" \n Results saved to:" , filename, " \n " )
}
Results saved to: results//gbsg-k2_v5_CV=200.RData
Code
output_dir <- "results/"
load_results <- dir.exists (output_dir)
if (load_results){
filename <- file.path (output_dir,
paste0 (fileout_cv,".RData" ))
load (file = filename)
}
#' # Define subgroups to display
subgroups <- list (
age_gt65 = list (
subset_expr = "age > 65" ,
name = "age > 65" ,
type = "reference"
),
age_lt65 = list (
subset_expr = "age <= 65" ,
name = "age <= 65" ,
type = "reference"
),
pgr_positive = list (
subset_expr = "pgr > 0" ,
name = "pgr > 0" ,
type = "reference"
),
pgr_negative = list (
subset_expr = "pgr <= 0" ,
name = "pgr <= 0" ,
type = "reference"
)
)
# Create the forest plot
result <- plot_subgroup_results_forestplot (
fs_results = list (fs.est = fs, fs_bc = fs_bc, fs_OOB = fs_OOB, fs_kfold = fs_ten),
df_analysis = df.analysis,
subgroup_list = subgroups,
outcome.name = "time_months" ,
event.name = "status" ,
treat.name = "hormon" ,
E.name = "Hormon" ,
C.name = "CT" ,
ci_column_spaces = 25
)
# Display the plot
plot (result$ plot)